package org.platon.template;

import ai.platon.pulsar.context.PulsarContexts;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.session.PulsarSession;
import com.google.gson.Gson;
import org.jsoup.nodes.Element;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class XHS_DESC {
    public static PulsarSession session = PulsarContexts.createSession();

    public static void main(String[] args) {
        // 创建Pulsar Session对象
        PulsarSession session = PulsarContexts.createSession();
// 定义URL
        String url = "https://www.xiaohongshu.com/explore/648fd062000000001300f4c3";
        String desc = getDesc(url);
        System.out.println("desc = " + desc);
    }
    public static String getDesc(String url) {
// 创建Pulsar Session对象
// 定义URL
// 加载页面，如果页面已过期或首次加载，则从Web获取
        WebPage page = session.load(url, "-expires 1d -refresh -parse ");
// 将页面内容解析为Jsoup文档对象
        FeaturedDocument document = session.parse(page, false);
// 加载、解析并爬取指定名称的字段
        List<Map<String, String>> fields = session.scrape(url, "-i 1d", "li[data-sku]",
                Map.of("desc", ".desc"));

        Gson gson = new Gson();
        System.out.println(gson.toJson(fields));
        return gson.toJson(fields);
    }

}
